# package
require("readr")
require("haven")
require("labelled")
require("dplyr")
require("tidyr")
require("scales")
require("autoMrP")
theme_set(theme_bw(base_size = 12))

# set working directory
setwd()

# census data by prefecture
census <- read_csv(
  "PREFECTURE_STATISTICS_2003_2023.csv", 
  locale = locale(encoding = "CP932")
)

# UTokyo-Asahi voter survey
## 2003
voter2003 <- read_csv(
  "utas060104_2003.csv", 
  locale = locale(encoding = "CP932")
)
voter2003$PREFECTURE <- gsub(
  pattern = "0|1|2|3|4|5|6|7|8|9", replacement = "", voter2003$district
)
voter2003$PREFECTURE <- recode(
  voter2003$PREFECTURE, 
  `北海道` = 1, `青森` = 2, `岩手` = 3, `宮城` = 4, 
  `秋田` = 5, `山形` = 6, `福島` = 7, `茨城` = 8, 
  `栃木` = 9, `群馬` = 10, `埼玉` = 11, `千葉` = 12, 
  `東京` = 13, `神奈川` = 14, `新潟` = 15, `富山` = 16, 
  `石川` = 17, `福井` = 18, `山梨` = 19, `長野` = 20, 
  `岐阜` = 21, `静岡` = 22, `愛知` = 23, `三重` = 24, 
  `滋賀` = 25, `京都` = 26, `大阪` = 27, `兵庫` = 28, 
  `奈良` = 29, `和歌山` = 30, `鳥取` = 31, `島根` = 32, 
  `岡山` = 33, `広島` = 34, `山口` = 35, `徳島` = 36, 
  `香川` = 37, `愛媛` = 38, `高知` = 39, `福岡` = 40, 
  `佐賀` = 41, `長崎` = 42, `熊本` = 43, `大分` = 44, 
  `宮崎` = 45, `鹿児島` = 46, `沖縄` = 47
)
voter2003 <- voter2003 %>% 
  mutate(ID = 1:nrow(voter2003), 
         YEAR = 2003, 
         SEX = recode(f010100, `0` = 1, `1` = 2, .default = NA_real_), 
         AGE = recode(f010200, `1` = 1, `2` = 1, `3` = 2, `4` = 2, `5` = 3, `6` = 4, `7` = 5, `8` = 6, .default = NA_real_), 
         EDUCATION = recode(f020600, `1` = 1, `2` = 2, `3` = 3, `4` = 4, .default = NA_real_), 
         LDP = q021301, 
         ORDER = NA) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2003$LDP <- rescale(voter2003$LDP, to = c(0, 1), from = c(0, 100))
## 2007
voter2007 <- read_csv(
  "utas080112.csv", 
  locale = locale(encoding = "CP932")
)
voter2007 <- voter2007 %>% 
  mutate(ID = 1:nrow(voter2007), 
         YEAR = 2007, 
         PREFECTURE = PREF, 
         SEX = recode(Q016600, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(Q016700, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, .default = NA_real_), 
         EDUCATION = recode(Q016900, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = Q020501, 
         ORDER = recode(Q020815, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2007$LDP <- rescale(voter2007$LDP, to = c(0, 1), from = c(0, 100))
voter2007$ORDER <- rescale(voter2007$ORDER, to = c(0, 1), from = c(1, 5))
voter2007$ORDER <- -1 * voter2007$ORDER + 1
## 2009
voter2009 <- read_sav(
  "http://www.masaki.j.u-tokyo.ac.jp/utas/2009_2010utas130816.sav", 
  user_na = FALSE
)
voter2009 <- mutate_if(voter2009, is.labelled, factor)
voter2009 <- voter2009 %>% 
  mutate(ID = 1:nrow(voter2009), 
         YEAR = 2009, 
         PREFECTURE = as.numeric(PREF), 
         SEX = recode(Q014600, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(Q014700, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, .default = NA_real_), 
         EDUCATION = recode(Q014800, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = Q012201,
         ORDER = recode(Q013818, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2009$LDP[voter2009$LDP == 999] <- NA
voter2009$LDP <- rescale(voter2009$LDP, to = c(0, 1), from = c(0, 100))
voter2009$ORDER <- rescale(voter2009$ORDER, to = c(0, 1), from = c(1, 5))
voter2009$ORDER <- -1 * voter2009$ORDER + 1
## 2012
voter2012 <- read_csv(
  "http://www.masaki.j.u-tokyo.ac.jp/utas/2012-2013UTASV131129.csv", 
  locale = locale(encoding = "CP932")
)
voter2012 <- voter2012 %>% 
  mutate(ID = 1:nrow(voter2012), 
         YEAR = 2012, 
         PREFECTURE = PREFEC, 
         SEX = recode(Q014100, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(Q014200, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, .default = NA_real_), 
         EDUCATION = recode(Q014300, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = Q012202,
         ORDER = recode(Q013015, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2012$LDP[voter2012$LDP == 999] <- NA
voter2012$LDP <- rescale(voter2012$LDP, to = c(0, 1), from = c(0, 100))
voter2012$ORDER <- rescale(voter2012$ORDER, to = c(0, 1), from = c(1, 5))
voter2012$ORDER <- -1 * voter2012$ORDER + 1
## 2014
voter2014 <- read_csv(
  "http://www.masaki.j.u-tokyo.ac.jp/utas/2014_2016UTASV20161004.csv", 
  locale = locale(encoding = "CP932")
)
voter2014 <- voter2014 %>% 
  mutate(ID = 1:nrow(voter2014), 
         YEAR = 2014, 
         PREFECTURE = PREFEC, 
         SEX = recode(W1F1, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(W1F2, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, .default = NA_real_), 
         EDUCATION = recode(W1F3, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = W1Q15_1,
         ORDER = recode(W1Q16_11, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2014$LDP[voter2014$LDP == 999] <- NA
voter2014$LDP <- rescale(voter2014$LDP, to = c(0, 1), from = c(0, 100))
voter2014$ORDER <- rescale(voter2014$ORDER, to = c(0, 1), from = c(1, 5))
voter2014$ORDER <- -1 * voter2014$ORDER + 1
## 2017
voter2017 <- read_csv(
  "http://www.masaki.j.u-tokyo.ac.jp/utas/2017UTASV20200326.csv", 
  locale = locale(encoding = "CP932")
)
voter2017 <- voter2017 %>% 
  mutate(ID = 1:nrow(voter2017), 
         YEAR = 2017, 
         PREFECTURE = PREFEC, 
         SEX = recode(F1, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(F2, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, .default = NA_real_), 
         EDUCATION = recode(F3, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = Q20_1,
         ORDER = recode(Q23_11, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2017$LDP[voter2017$LDP == 999] <- NA
voter2017$LDP <- rescale(voter2017$LDP, to = c(0, 1), from = c(0, 100))
voter2017$ORDER <- rescale(voter2017$ORDER, to = c(0, 1), from = c(1, 5))
voter2017$ORDER <- -1 * voter2017$ORDER + 1
## 2022
voter2022 <- read_csv(
  "http://www.masaki.j.u-tokyo.ac.jp/utas/2022UTASV20220730.csv", 
  locale = locale(encoding = "CP932")
)
voter2022 <- voter2022 %>% 
  mutate(ID = 1:nrow(voter2022), 
         YEAR = 2022, 
         PREFECTURE = as.numeric(Prefecture), 
         SEX = recode(Q43, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(Q44, `1` = 1, `2` = 1, `3` = 1, `4` = 2, `5` = 2, `6` = 3, `7` = 3, `8` = 4, `9` = 4, 
                      `10` = 5, `11` = 5, `12` = 6, `13` = 6, `14` = 6, `15` = 6, .default = NA_real_), 
         EDUCATION = recode(Q47, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = Q41_1, 
         ORDER = recode(Q38_10, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2022$LDP[voter2022$LDP == 99] <- NA
voter2022$LDP <- rescale(voter2022$LDP, to = c(0, 1), from = c(0, 10))
voter2022$ORDER <- rescale(voter2022$ORDER, to = c(0, 1), from = c(1, 5))
voter2022$ORDER <- -1 * voter2022$ORDER + 1
## 2023
voter2023 <- read_csv(
  "http://www.masaki.j.u-tokyo.ac.jp/utas/2023UTASV20230416.csv", 
  locale = locale(encoding = "CP932")
)
voter2023 <- voter2023 %>% 
  mutate(ID = 1:nrow(voter2023), 
         YEAR = 2023, 
         PREFECTURE = as.numeric(Prefecture), 
         SEX = recode(Q47, `1` = 1, `2` = 2, .default = NA_real_), 
         AGE = recode(Q48, `1` = 1, `2` = 1, `3` = 1, `4` = 2, `5` = 2, `6` = 3, `7` = 3, `8` = 4, `9` = 4, 
                      `10` = 5, `11` = 5, `12` = 6, `13` = 6, `14` = 6, `15` = 6, .default = NA_real_), 
         EDUCATION = recode(Q47, `1` = 1, `2` = 2, `3` = 3, `4` = 3, `5` = 4, `6` = 4, .default = NA_real_), 
         LDP = Q42_1, 
         ORDER = recode(Q43_12, `1` = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, .default = NA_real_)) %>% 
  select(YEAR, ID, PREFECTURE, SEX, AGE, EDUCATION, LDP, ORDER)
voter2023$LDP[voter2023$LDP == 99] <- NA
voter2023$LDP <- rescale(voter2023$LDP, to = c(0, 1), from = c(0, 10))
voter2023$ORDER <- rescale(voter2023$ORDER, to = c(0, 1), from = c(1, 5))
voter2023$ORDER <- -1 * voter2023$ORDER + 1

# MRP
## merge
voter2003.2023 <- bind_rows(voter2003, voter2007, voter2009, voter2012, 
                            voter2014, voter2017, voter2022, voter2023)
voter2003.2023 <- left_join(
  voter2003.2023, census, 
  by = c("YEAR", "PREFECTURE", "SEX", "AGE", "EDUCATION")
)
## estimate
mrp.result <- NULL
year <- c(2003, 2007, 2009, 2012, 2014, 2017, 2022, 2023)
issues <- c("LDP", "ORDER")
for (i in 1:8) {
  for (j in 1:2) {
    if (nrow(na.omit(voter2003.2023[voter2003.2023$YEAR == year[i], issues[j]])) == 0) {
      next
    } else {
      if (i %in% 1:3) {
        mrp <- auto_MrP(
          y = issues[j],
          L1.x = c("SEX", "AGE", "EDUCATION"),
          L2.x = c("DENSITY", "COLLEGE_PERCENT", "FOREIGNER_PERCENT", 
                   "PRIMERY_INDUSTRY_PERCENT", "SECONDARY_INDUSTRY_PERCENT", 
                   "CRIME_RATE", "TURNOUT_PERCENT", "LDP_PERCENT", 
                   "DPJ_PERCENT", "JCP_PERCENT"),
          L2.unit = "PREFECTURE",
          survey = na.omit(voter2003.2023[voter2003.2023$YEAR == year[i],
                                          c(issues[j], "SEX", "AGE", 
                                            "EDUCATION", "PREFECTURE", 
                                            "DENSITY", "COLLEGE_PERCENT", 
                                            "FOREIGNER_PERCENT", 
                                            "PRIMERY_INDUSTRY_PERCENT", 
                                            "SECONDARY_INDUSTRY_PERCENT", 
                                            "CRIME_RATE", "TURNOUT_PERCENT", 
                                            "LDP_PERCENT", "DPJ_PERCENT", 
                                            "JCP_PERCENT")]),
          census = census[census$YEAR == year[i],], 
          bin.proportion = "PROPORTION", 
          best.subset = FALSE,
          pca = FALSE,
          lasso = FALSE,
          gb = FALSE,
          svm = FALSE,
          mrp = TRUE
        )
        mrp.result <- rbind(mrp.result, 
                            data.frame(PREFECTURE = mrp$classifiers[,1], 
                                       YEAR = year[i], 
                                       QUESTION = issues[j], 
                                       POSITON = mrp$classifiers[,2]))
      }
      if (i %in% 4:5) {
        mrp <- auto_MrP(
          y = issues[j],
          L1.x = c("SEX", "AGE", "EDUCATION"),
          L2.x = c("DENSITY", "COLLEGE_PERCENT", "FOREIGNER_PERCENT", 
                   "PRIMERY_INDUSTRY_PERCENT", "SECONDARY_INDUSTRY_PERCENT", 
                   "CRIME_RATE", "TURNOUT_PERCENT", "LDP_PERCENT", 
                   "DPJ_PERCENT", "JCP_PERCENT", "JIP_PERCENT"),
          L2.unit = "PREFECTURE",
          survey = na.omit(voter2003.2023[voter2003.2023$YEAR == year[i],
                                          c(issues[j], "SEX", "AGE", 
                                            "EDUCATION", "PREFECTURE", 
                                            "DENSITY", "COLLEGE_PERCENT", 
                                            "FOREIGNER_PERCENT", 
                                            "PRIMERY_INDUSTRY_PERCENT", 
                                            "SECONDARY_INDUSTRY_PERCENT", 
                                            "CRIME_RATE", "TURNOUT_PERCENT", 
                                            "LDP_PERCENT", "DPJ_PERCENT", 
                                            "JCP_PERCENT", "JIP_PERCENT")]),
          census = census[census$YEAR == year[i],], 
          bin.proportion = "PROPORTION", 
          best.subset = FALSE,
          pca = FALSE,
          lasso = FALSE,
          gb = FALSE,
          svm = FALSE,
          mrp = TRUE
        )
        mrp.result <- rbind(mrp.result, 
                            data.frame(PREFECTURE = mrp$classifiers[,1], 
                                       YEAR = year[i], 
                                       QUESTION = issues[j], 
                                       POSITON = mrp$classifiers[,2]))
      }
      if (i == 6) {
        mrp <- auto_MrP(
          y = issues[j],
          L1.x = c("SEX", "AGE", "EDUCATION"),
          L2.x = c("DENSITY", "COLLEGE_PERCENT", "FOREIGNER_PERCENT", 
                   "PRIMERY_INDUSTRY_PERCENT", "SECONDARY_INDUSTRY_PERCENT", 
                   "CRIME_RATE", "TURNOUT_PERCENT", "LDP_PERCENT", 
                   "CDP_PERCENT", "HOP_PERCENT", "JCP_PERCENT", "JIP_PERCENT"),
          L2.unit = "PREFECTURE",
          survey = na.omit(voter2003.2023[voter2003.2023$YEAR == year[i],
                                          c(issues[j], "SEX", "AGE", 
                                            "EDUCATION", "PREFECTURE", 
                                            "DENSITY", "COLLEGE_PERCENT", 
                                            "FOREIGNER_PERCENT", 
                                            "PRIMERY_INDUSTRY_PERCENT", 
                                            "SECONDARY_INDUSTRY_PERCENT", 
                                            "CRIME_RATE", "TURNOUT_PERCENT", 
                                            "LDP_PERCENT", "CDP_PERCENT", 
                                            "HOP_PERCENT", "JCP_PERCENT", 
                                            "JIP_PERCENT")]),
          census = census[census$YEAR == year[i],], 
          bin.proportion = "PROPORTION", 
          best.subset = FALSE,
          pca = FALSE,
          lasso = FALSE,
          gb = FALSE,
          svm = FALSE,
          mrp = TRUE
        )
        mrp.result <- rbind(mrp.result, 
                            data.frame(PREFECTURE = mrp$classifiers[,1], 
                                       YEAR = year[i], 
                                       QUESTION = issues[j], 
                                       POSITON = mrp$classifiers[,2]))
      }
      if (i %in% 7:8) {
        mrp <- auto_MrP(
          y = issues[j],
          L1.x = c("SEX", "AGE", "EDUCATION"),
          L2.x = c("DENSITY", "COLLEGE_PERCENT", "FOREIGNER_PERCENT", 
                   "PRIMERY_INDUSTRY_PERCENT", "SECONDARY_INDUSTRY_PERCENT", 
                   "CRIME_RATE", "TURNOUT_PERCENT", "LDP_PERCENT", 
                   "CDP_PERCENT", "DPFP_PERCENT", "JCP_PERCENT", "JIP_PERCENT"),
          L2.unit = "PREFECTURE",
          survey = na.omit(voter2003.2023[voter2003.2023$YEAR == year[i],
                                          c(issues[j], "SEX", "AGE", 
                                            "EDUCATION", "PREFECTURE", 
                                            "DENSITY", "COLLEGE_PERCENT", 
                                            "FOREIGNER_PERCENT", 
                                            "PRIMERY_INDUSTRY_PERCENT", 
                                            "SECONDARY_INDUSTRY_PERCENT", 
                                            "CRIME_RATE", "TURNOUT_PERCENT", 
                                            "LDP_PERCENT", "CDP_PERCENT", 
                                            "DPFP_PERCENT", "JCP_PERCENT", 
                                            "JIP_PERCENT")]),
          census = census[census$YEAR == year[i],], 
          bin.proportion = "PROPORTION", 
          best.subset = FALSE,
          pca = FALSE,
          lasso = FALSE,
          gb = FALSE,
          svm = FALSE,
          mrp = TRUE
        )
        mrp.result <- rbind(mrp.result, 
                            data.frame(PREFECTURE = mrp$classifiers[,1], 
                                       YEAR = year[i], 
                                       QUESTION = issues[j], 
                                       POSITON = mrp$classifiers[,2]))
      }
    }
  }
}
mrp.result <- spread(mrp.result, key = QUESTION, value = mrp)
mrp.result$PREFECTURE_NAME <- recode(
  mrp.result$PREFECTURE, 
  `1` = "Hokkaido", `2` = "Aomori", `3` = "Iwate", `4` = "Miyagi", 
  `5` = "Akita", `6` = "Yamagata", `7` = "Fukushima", `8` = "Ibaraki", 
  `9` = "Tochigi", `10` = "Gunma", `11` = "Saitama", `12` = "Chiba", 
  `13` = "Tokyo", `14` = "Kanagawa", `15` = "Niigata", `16` = "Toyama", 
  `17` = "Ishikawa", `18` = "Fukui", `19` = "Yamanashi", `20` = "Nagano", 
  `21` = "Gifu", `22` = "Shizuoka", `23` = "Aichi", `24` = "Mie", 
  `25` = "Shiga", `26` = "Kyoto", `27` = "Osaka", `28` = "Hyogo", 
  `29` = "Nara", `30` = "Wakayama", `31` = "Tottori", `32` = "Shimane", 
  `33` = "Okayama", `34` = "Hiroshima", `35` = "Yamaguchi", `36` = "Tokushima", 
  `37` = "Kagawa", `38` = "Ehime", `39` = "Kochi", `40` = "Fukuoka", 
  `41` = "Saga", `42` = "Nagasaki", `43` = "Kumamoto", `44` = "Oita", 
  `45` = "Miyazaki", `46` = "Kagoshima", `47` = "Okinawa"
)
mrp.result$PREFECTURE_NAME <- factor(
  mrp.result$PREFECTURE_NAME, 
  levels = c("Hokkaido", "Aomori", "Iwate", "Miyagi", 
             "Akita", "Yamagata", "Fukushima", "Ibaraki", 
             "Tochigi", "Gunma", "Saitama", "Chiba", 
             "Tokyo", "Kanagawa", "Niigata", "Toyama", 
             "Ishikawa", "Fukui", "Yamanashi", "Nagano", 
             "Gifu", "Shizuoka", "Aichi", "Mie", 
             "Shiga", "Kyoto", "Osaka", "Hyogo", 
             "Nara", "Wakayama", "Tottori", "Shimane", 
             "Okayama", "Hiroshima", "Yamaguchi", "Tokushima", 
             "Kagawa", "Ehime",  "Kochi", "Fukuoka", 
             "Saga", "Nagasaki", "Kumamoto", "Oita", 
             "Miyazaki", "Kagoshima", "Okinawa")
)
mrp.result$PREFECTURE <- as.numeric(mrp.result$PREFECTURE)
mrp.result <- mrp.result[,c("PREFECTURE", "PREFECTURE_NAME", "YEAR", "LDP", "ORDER")]
mrp.result <- census %>% 
  select(-SEX, -AGE, -EDUCATION, -POPULATION, -PREFECTURE_POPULATION, -PROPORTION) %>% 
  unique() %>% 
  left_join(mrp.result, ., by = c("PREFECTURE", "PREFECTURE_NAME", "YEAR"))

# output
save(mrp.result, file = "MRP_PREFECTURE_UTAS.rda")
